#' ---------------------------------------------------------
#' Does Sustainability Generate Better Financial Performance? 
#' Review, Meta-analysis, and Propositions
#' 
#' This file loads the audited database with codes from the 
#' Excel spreadsheet used after the DistillerSR analysis.
#' The script creates all necessary objects, cleans the
#' data, and runs the regression tables.
#' 
#' Contact: Ulrich Atz, ua6@stern.nyu.edu 
#' Last edit: 2021-08-30
#' ---------------------------------------------------------
#' sessionInfo()
#' 
#' R version 4.1.2 (2021-11-01)
#' Platform: aarch64-apple-darwin20 (64-bit)
#' Running under: macOS Monterey 12.0.1
#' 
#' Matrix products: default
#' LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib
#' 
#' locale:
#'   [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#' 
#' attached base packages:
#'   [1] stats     graphics  grDevices utils     datasets 
#' [6] methods   base     
#' 
#' other attached packages:
#'   [1] stargazer_5.2.2    MASS_7.3-54       
#' [3] summarytools_1.0.0 janitor_2.1.0     
#' [5] forcats_0.5.1      stringr_1.4.0     
#' [7] dplyr_1.0.7        purrr_0.3.4       
#' [9] readr_2.0.2        tidyr_1.1.4       
#' [11] tibble_3.1.5       ggplot2_3.3.5     
#' [13] tidyverse_1.3.1   
#' ---------------------------------------------------------

 
# Install packages if not installed 
library(tidyverse)
library(janitor)
library(summarytools)
library(MASS)
library(stargazer)

study_vars <- c("refid", "user", "author", "title", "abstract")

rd <- read_csv("data/ESG-studies-database.csv", 
                 na = "") %>% 
  mutate(res_o = ordered(res_o),
         res_o = fct_relevel(res_o, "Negative", "Mixed", "Neutral", "Positive"),
         res_g = ordered(res_g),
         res_g = fct_relevel(res_g, "Negative", "Neutral/mixed", "Positive"))

# Corporate studies only
rd.cor <- rd %>% filter(funds == 0)
# Investor studies only
rd.inv <- rd %>% filter(funds == 1)
# Climate change papers only  
rd.cc <- rd %>% filter(cc_papers == 1)

#-------------------------------------------------------------------
# Results
#-------------------------------------------------------------------
# Figure 2
with(rd, ctable(funds, res_o, round.digits = 0))

# For the sign test
table(rd$res_g, rd$funds)

#                C  I
# Negative       9 13
# Neutral/mixed 47 47
# Positive      85 37


# Hardcoded because reasons
# Binomial test
binom.test(85, 141, 0.33) # Corporate
binom.test(37, 97, 0.33)  # Investor

# Wilcoxon signed rank test 
wilcox.test(as.numeric(rd.cor$res_g), 
            mu = mean(as.numeric(rd.cor$res_g)), 
            correct = FALSE) # Corporate
wilcox.test(as.numeric(rd.inv$res_g), 
            mu = mean(as.numeric(rd.inv$res_g)), 
            correct = FALSE) # Investor

# Confidence intervals
conf_int_fpc <- function(p, n, N, alpha = 0.05) {
  fpc <- ((N-n)/(N-1))^(1/2)
  ci <- round(qnorm(c(alpha/2, (1-alpha/2)), p, sqrt((p*(1-p))/n) * fpc), 3)
  moe <- round(sqrt((p*(1-p))/n) * fpc * qt(1-alpha/2, n), 4)
  return(list(`lower and upper bound` = ci, `margin of error` = moe))
}

# Adjust population size proportionally
conf_int_fpc(0.60, 141, 1141 - 159) # Corporate
conf_int_fpc(0.38,  97, 159) # Investor


#-------------------------------------------------------------------
# Descriptive statistics
#-------------------------------------------------------------------
map_int(lst(rd.cor, rd.inv, rd.cc), nrow)

rd %>% pull(year_sample_start) %>% median(na.rm = T)
rd %>% pull(year_sample_end) %>% median(na.rm = T)

tabyl(rd$global)
tabyl(rd$usa)
tabyl(rd$europe)

#-------------------------------------------------------------------
# Tables and cross-tabs
#-------------------------------------------------------------------

# Table 1 Panel A
# Mediating factors

with(rd.cor, ctable(mf_risk, res_g, round.digits = 0, chisq = TRUE))
with(rd.inv, ctable(mf_risk, res_g, round.digits = 0))

# Table 1 Panel B
# Characteristics

ctable(rd$disclosure,     rd$res_g, round.digits = 0, dnn = c("Overall finding", "ESG disclosure"))
ctable(rd$performance,    rd$res_g, round.digits = 0, dnn = c("Overall finding", "ESG performance"))
ctable(rd$cfp_accounting, rd$res_g, round.digits = 0, dnn = c("Overall finding", "Accounting-based measures"))
ctable(rd$cfp_market,     rd$res_g, round.digits = 0, dnn = c("Overall finding", "Market-based measures"))
ctable(rd$esg_esg,        rd$res_g, round.digits = 0, dnn = c("Overall finding", "ESG score"))

ctable(rd$longterm,  rd$res_g, round.digits = 0, dnn = c("Overall finding", "Implied long-term relationship"))
ctable(rd$time_lag,  rd$res_g, round.digits = 0, dnn = c("Overall finding", "Lagged dependent variable"))
ctable(rd$causality, rd$res_g, round.digits = 0, dnn = c("Overall finding", "Causality"))

ctable(rd.inv$equities,    rd.inv$res_g, round.digits = 0)
ctable(rd.inv$fixedincome, rd.inv$res_g, round.digits = 0)

ctable(rd.inv$active,  rd.inv$res_g, round.digits = 0)
ctable(rd.inv$passive, rd.inv$res_g, round.digits = 0)

ctable(rd.inv$esg_negscree,    rd.inv$res_g, round.digits = 0)
ctable(rd.inv$esg_pooled,      rd.inv$res_g, round.digits = 0)
ctable(rd.inv$esg_integration, rd.inv$res_g, round.digits = 0)

# Table 1 Panel C
# Climate change
with(rd.cc, ctable(decarbonization, res_g, round.digits = 0))


#-------------------------------------------------------
# Regressions
#-------------------------------------------------------

# Table 2

out_cor <- list()

out_cor[[1]] <- polr(res_g ~ funds + cc_papers, rd, Hess=TRUE)
out_cor[[2]] <- polr(res_g ~ funds + cc_papers + disclosure + cfp_accounting + esg_esg + region, rd, Hess=TRUE)
out_cor[[3]] <- polr(res_g ~ funds + cc_papers + longterm + time_lag + causality + notheory + region, rd, Hess=TRUE)
out_cor[[4]] <- polr(res_g ~ funds + cc_papers + mf_risk + mf_opeff + mf_inno + region, rd, Hess=TRUE)
out_cor[[5]] <- polr(res_g ~ funds + cc_papers + disclosure + cfp_accounting + esg_esg + longterm + time_lag + causality + notheory + mf_risk + mf_opeff + mf_inno + region, rd, Hess=TRUE)

stargazer(out_cor, type = "html", out="corporate-ordinal-regression.htm", single.row = TRUE,
          dep.var.labels = c("Overall finding (negative, neutral/mixed, positive)"),
          notes = "Standard errors in parentheses",
          omit        = c("region"),
          omit.labels = c("Region FE"),
          covariate.labels = c("Investor study",
                               "Climate change issue",
                               "ESG disclosure (vs performance)",
                               "Accounting-based (vs market)",
                               "ESG score (vs E/S/G/other)",
                               "Implied long-term relationship",
                               "Lagged dependent variable",
                               "Fixed effects / matching methods / instrumental variables",
                               "No mainstream social science theory",
                               "Mediating factor: Risk",
                               "Mediating factor: Operational efficiency",
                               "Mediating factor: Innovation"))

# Calculate most appropriate pseudo-R-squared
out_cor %>% map_dbl(DescTools::PseudoR2, which = "Nagelkerke") %>% round(3)

exp(coef(out_cor[[1]]))

#---------------------------
# Table 3

out_inv <- list()

out_inv[[1]] <- polr(res_g ~ esg_negscree + esg_pooled + esg_integration, rd.inv, Hess=TRUE)
out_inv[[2]] <- polr(res_g ~ esg_negscree + esg_pooled + esg_integration + cc_papers + region, rd.inv, Hess=TRUE)
out_inv[[3]] <- polr(res_g ~ esg_negscree + esg_pooled + esg_integration + cc_papers + disclosure + cfp_accounting + esg_esg + region, rd.inv, Hess=TRUE)
out_inv[[4]] <- polr(res_g ~ esg_negscree + esg_pooled + esg_integration + cc_papers + active + equities + region, rd.inv , Hess=TRUE)
out_inv[[5]] <- polr(res_g ~ cc_papers + disclosure + cfp_accounting + esg_esg + esg_negscree + esg_pooled + esg_integration + active + equities + region, data = rd.inv)

stargazer(out_inv, type = "html", out="investor-ordinal-regression.htm", single.row = TRUE,
          dep.var.labels = c("Overall finding (negative, neutral/mixed, positive)"),
          notes = "Standard errors in parentheses",
          omit        = c("region"),
          omit.labels = c("Region FE"),
          covariate.labels = c("Negative screening or divesting",
                               "Pooled strategies",
                               "ESG integration",
                               "Climate change issue",
                               "ESG disclosure (vs performance)",
                               "Accounting-based (vs market)",
                               "ESG score (vs E/S/G/other)",
                               "Active management",
                               "Equities"))

car::linearHypothesis(out_inv[[5]], "esg_negscree = esg_integration")
car::linearHypothesis(out_inv[[5]], "esg_pooled   = esg_integration")

out_inv %>% map_dbl(DescTools::PseudoR2, which = "Nagelkerke") %>% round(3)



#-------------------------------------------------------
# Table 4

out_cc <- list()

out_cc[[1]] <- polr(res_g ~ decarbonization + funds, rd.cc, Hess=TRUE)
out_cc[[2]] <- polr(res_g ~ decarbonization + funds + disclosure + cfp_accounting, rd.cc, Hess=TRUE)
out_cc[[3]] <- polr(res_g ~ decarbonization + funds + disclosure + cfp_accounting + region, rd.cc, Hess=TRUE)
out_cc[[4]] <- polr(res_g ~ decarbonization + disclosure + cfp_accounting + esg_negscree + esg_pooled +esg_integration , rd.cc, Hess=TRUE)
out_cc[[5]] <- polr(res_g ~ decarbonization + disclosure + cfp_accounting + active + equities, rd.cc , Hess=TRUE)

stargazer(out_cc, type = "html", out="investor-climate-ordinal-regression.htm", single.row = TRUE,
          dep.var.labels = c("Overall finding (negative, neutral/mixed, positive)"),
          notes = "Standard errors in parentheses",
          omit        = c("region"),
          omit.labels = c("Region FE"),
          covariate.labels = c("Decarbonization",
                               "Investor study",
                               "ESG disclosure (vs performance)",
                               "Accounting-based (vs market)",
                               "Negative screening or divesting",
                               "Pooled strategies",
                               "ESG integration",
                               "Active management",
                               "Equities"))

out_cc %>% map_dbl(DescTools::PseudoR2, which = "Nagelkerke") %>% round(3)

